#set new directory
setwd("~/Class/Winter/DataViz/Project/")
#import data
coc <- read_csv("data samples/9Sep2019.csv")
## Parsed with column specification:
## cols(
## .default = col_double(),
## `Trip ID` = col_character(),
## `Trip Start Timestamp` = col_character(),
## `Trip End Timestamp` = col_character(),
## `Shared Trip Authorized` = col_logical(),
## `Pickup Centroid Location` = col_character(),
## `Dropoff Centroid Location` = col_character()
## )
## See spec(...) for full column specifications.
summary(coc)
## Trip ID Trip Start Timestamp Trip End Timestamp Trip Seconds
## Length:4000 Length:4000 Length:4000 Min. : 5.0
## Class :character Class :character Class :character 1st Qu.: 570.8
## Mode :character Mode :character Mode :character Median : 888.5
## Mean :1122.0
## 3rd Qu.:1452.0
## Max. :6277.0
##
## Trip Miles Pickup Census Tract Dropoff Census Tract
## Min. : 0.000 Min. :1.703e+10 Min. :1.703e+10
## 1st Qu.: 2.000 1st Qu.:1.703e+10 1st Qu.:1.703e+10
## Median : 3.800 Median :1.703e+10 Median :1.703e+10
## Mean : 6.443 Mean :1.703e+10 Mean :1.703e+10
## 3rd Qu.: 8.300 3rd Qu.:1.703e+10 3rd Qu.:1.703e+10
## Max. :76.900 Max. :1.703e+10 Max. :1.703e+10
## NA's :1120 NA's :1171
## Pickup Community Area Dropoff Community Area Fare
## Min. : 1.00 Min. : 1.0 Min. : 0.00
## 1st Qu.: 8.00 1st Qu.: 8.0 1st Qu.: 5.00
## Median :24.00 Median :24.0 Median : 10.00
## Mean :26.01 Mean :26.5 Mean : 12.07
## 3rd Qu.:32.00 3rd Qu.:32.0 3rd Qu.: 15.00
## Max. :77.00 Max. :77.0 Max. :117.50
## NA's :230 NA's :302
## Tip Additional Charges Trip Total Shared Trip Authorized
## Min. : 0.0000 Min. : 0.000 Min. : 0.72 Mode :logical
## 1st Qu.: 0.0000 1st Qu.: 2.550 1st Qu.: 8.55 FALSE:3408
## Median : 0.0000 Median : 2.550 Median : 12.55 TRUE :592
## Mean : 0.7445 Mean : 3.074 Mean : 15.88
## 3rd Qu.: 0.0000 3rd Qu.: 2.550 3rd Qu.: 17.55
## Max. :20.0000 Max. :20.760 Max. :127.95
##
## Trips Pooled Pickup Centroid Latitude Pickup Centroid Longitude
## Min. :1.000 Min. :41.65 Min. :-87.91
## 1st Qu.:1.000 1st Qu.:41.88 1st Qu.:-87.68
## Median :1.000 Median :41.89 Median :-87.65
## Mean :1.142 Mean :41.89 Mean :-87.67
## 3rd Qu.:1.000 3rd Qu.:41.93 3rd Qu.:-87.63
## Max. :6.000 Max. :42.02 Max. :-87.53
## NA's :225 NA's :225
## Pickup Centroid Location Dropoff Centroid Latitude Dropoff Centroid Longitude
## Length:4000 Min. :41.65 Min. :-87.91
## Class :character 1st Qu.:41.88 1st Qu.:-87.68
## Mode :character Median :41.89 Median :-87.65
## Mean :41.89 Mean :-87.67
## 3rd Qu.:41.93 3rd Qu.:-87.63
## Max. :42.02 Max. :-87.53
## NA's :295 NA's :295
## Dropoff Centroid Location
## Length:4000
## Class :character
## Mode :character
##
##
##
##
Now let’s look at all the pick-up locations:
# Chicago = 41.8781° N, -87.6298° W
p <-ggmap(get_googlemap(center = c(lon = -87.629800, lat =41.878100 ),
zoom = 11, scale = 2,
maptype ='roadmap',
color = 'color', key = myKey))
## Source : https://maps.googleapis.com/maps/api/staticmap?center=41.8781,-87.6298&zoom=11&size=640x640&scale=2&maptype=roadmap&key=xxx
p + geom_point(aes(x = `Pickup Centroid Longitude`, y = `Pickup Centroid Latitude`), data = coc, size = 0.7) +
theme(legend.position="bottom")
## Warning: Removed 473 rows containing missing values (geom_point).
Now let’s look at all the drop-off locations:
d <-ggmap(get_googlemap(center = c(lon = -87.629800, lat =41.878100 ),
zoom = 11, scale = 2,
maptype ='terrain',
color = 'color', key = myKey))
## Source : https://maps.googleapis.com/maps/api/staticmap?center=41.8781,-87.6298&zoom=11&size=640x640&scale=2&maptype=terrain&key=xxx
d + geom_point(aes(x = `Dropoff Centroid Longitude`, y = `Dropoff Centroid Latitude`), data = coc, size = 0.7) +
theme(legend.position="bottom")
## Warning: Removed 535 rows containing missing values (geom_point).
######### #REPEAT ABOVE WITH ZOOM ######### ### Pick-Up Density
Now let’s look at all the pick-up locations:
# Chicago = 41.8781° N, -87.6298° W
p <-ggmap(get_googlemap(center = c(lon = -87.629800, lat =41.878100 ),
zoom = 12, scale = 2,
maptype ='terrain',
color = 'color', key = myKey))
## Source : https://maps.googleapis.com/maps/api/staticmap?center=41.8781,-87.6298&zoom=12&size=640x640&scale=2&maptype=terrain&key=xxx
p + geom_point(aes(x = `Pickup Centroid Longitude`, y = `Pickup Centroid Latitude`), data = coc, size = 0.7) +
theme(legend.position="bottom")
## Warning: Removed 1200 rows containing missing values (geom_point).
Now let’s look at all the drop-off locations:
d <-ggmap(get_googlemap(center = c(lon = -87.629800, lat =41.878100 ),
zoom = 14, scale = 2,
maptype ='terrain',
color = 'color', key = myKey))
## Source : https://maps.googleapis.com/maps/api/staticmap?center=41.8781,-87.6298&zoom=14&size=640x640&scale=2&maptype=terrain&key=xxx
d + geom_point(aes(x = `Dropoff Centroid Longitude`, y = `Dropoff Centroid Latitude`), data = coc, size = 0.7) +
theme(legend.position="bottom")
## Warning: Removed 2869 rows containing missing values (geom_point).
It is a bit tricky to see the density of the pickup and dropoffs because all the points are sitting on top of each other. Below sets the alpha variable which will make the dots transparent. This helps display the density of points plotted.
p + geom_point(aes(x = `Pickup Centroid Longitude`, y = `Pickup Centroid Latitude`), colour = '#011f4b', data = coc, alpha=0.25, size = 0.5) +
theme(legend.position="none")
## Warning: Removed 1200 rows containing missing values (geom_point).
p + stat_density2d(
aes(x = `Pickup Centroid Longitude`, y = `Pickup Centroid Latitude`,
fill = ..level.., alpha = 0.15), size = 0.01, bins = 30,
data = coc, geom = "polygon") #+
## Warning: Removed 1200 rows containing non-finite values (stat_density2d).
#geom_point(aes(x = x, y = y, stroke = 2), colour=col4, data = n, size =1.5) +
#geom_label_repel(aes(x, y, label = label), data=n, family = 'Times', size = 3, box.padding = 0.2, point.padding = 0.3, segment.color = 'grey50')
p + stat_density2d(
aes(x = `Pickup Centroid Longitude`, y = `Pickup Centroid Latitude`, fill = ..level.., alpha = 0.25),
size = 0.1, bins = 40, data = coc,
geom = "polygon"
) +
geom_density2d(data = coc,
aes(x = `Pickup Centroid Longitude`, y = `Pickup Centroid Latitude`), size = 0.3)
## Warning: Removed 1200 rows containing non-finite values (stat_density2d).
## Warning: Removed 1200 rows containing non-finite values (stat_density2d).
#END